home *** CD-ROM | disk | FTP | other *** search
- title lzcomp - file compressor using limpel-ziv algorithm
-
- ;Tom Pfau
- ;Digital Equipment Corporation
- ;Parsippany, NJ
-
- ;Constants
- clear equ 256 ;Clear code
- eof equ 257 ;End of file marker
- first_free equ 258 ;First free code
- maxmax equ 4096 ;Max code + 1
-
- include macros.mlb
-
- ;Hash table entry
- hash_rec struc
- first dw ? ; First entry with this value
- next dw ? ; Next entry along chain
- char db ? ; Suffix char
- hash_rec ends
-
- ;Declare Segments
- code segment byte public 'code'
- code ends
- stack segment word stack 'data'
- dw 128 dup (?)
- stack ends
- data segment word public 'data'
- data ends
- memory segment para public 'memory'
- hash label hash_rec
- memory ends
-
- ;Start writing code
- code segment
- assume cs:code,ds:data,es:data,ss:stack
-
- start proc far
- mov bx,seg hash ;End of program
- mov ax,ds ;Start of program
- sub bx,ax ;Program size
- inc bx ;Make sure
- setmem bx ;Set our size
- mov bx,data ;Set up data segment addressability
- mov es,bx
- mov ds,bx
- print input_prompt ;Get input file name
- input input_file
- print crlf
- print output_prompt ;And output
- input output_file
- print crlf
- mov al,input_file+1 ;Terminate file names with nulls
- xor ah,ah
- mov si,ax
- mov input_file+2[si],0
- mov al,output_file+1
- mov si,ax
- mov output_file+2[si],0
- hopen input_file+2,0
- mov input_handle,ax
- hcreat output_file+2,0
- mov output_handle,ax
- call compress ;Compress file
- hclose input_handle ;Close in and out
- hclose output_handle
- exit ;Done
- start endp
-
- data segment
- input_prompt db 'Input file: $'
- output_prompt db 'Output file: $'
- input_file db 80,0,80 dup (?)
- output_file db 80,0,80 dup (?)
- crlf db 13,10,'$'
- input_handle dw ?
- output_handle dw ?
- data ends
-
- compress proc near
- malloc 1280 ;Allocate space for hash table
- mov hash_seg,ax ;Save segment address
- l1: call init_table ;Initialize the table and some vars
- mov ax,clear ;Write a clear code
- call write_code
- call read_char ;Read first char
- l4: xor ah,ah ;Turn char into code
- l4a: mov prefix_code,ax ;Set prefix code
- call read_char ;Read next char
- jc l17 ;Carry means eof
- mov k,al ;Save char in k
- mov bx,prefix_code ;Get prefix code
- call lookup_code ;See if this pair in table
- jnc l4a ;nc means yes, new code in ax
- call add_code ;Add pair to table
- push bx ;Save new code
- mov ax,prefix_code ;Write old prefix code
- call write_code
- pop bx
- mov al,k ;Get last char
- cmp bx,max_code ;Exceed code size?
- jl l4 ;less means no
- cmp nbits,12 ;Currently less than 12 bits?
- jl l14 ;yes
- mov ax,clear ;Write a clear code
- call write_code
- call init_table ;Reinit table
- mov al,k ;get last char
- jmp l4 ;Start over
- l14: inc nbits ;Increase number of bits
- shl max_code,1 ;Double max code size
- jmp l4 ;Get next char
- l17: mov ax,prefix_code ;Write last code
- call write_code
- mov ax,eof ;Write eof code
- call write_code
- mov ax,bit_offset ;Make sure buffer is flushed to file
- cmp ax,0
- je l18
- mov cx,8 ;convert bits to bytes
- xor dx,dx
- div cx
- or dx,dx ;If extra bits, make sure they get
- je l17a ;written
- inc ax
- l17a: call flush
- l18: ret
- compress endp
-
- data segment
- hash_seg dw ?
- prefix_code dw ?
- free_code dw ?
- max_code dw ?
- nbits dw ?
- k db ?
- data ends
-
- init_table proc near
- mov nbits,9 ;Set code size to 9
- mov max_code,512 ;Set max code to 512
- push es ;Save seg reg
- mov es,hash_seg ;Address hash table
- mov ax,-1 ;Unused flag
- mov cx,640 ;Clear first 256 entries
- mov di,offset hash ;Point to first entry
- rep stosw ;Clear it out
- pop es ;Restore seg reg
- mov free_code,first_free ;Set next code to use
- ret ;done
- init_table endp
-
- write_code proc near
- push ax ;Save code
- mov ax,bit_offset ;Get bit offset
- mov cx,nbits ;Adjust bit offset by code size
- add bit_offset,cx
- mov cx,8 ;Convert bit offset to byte offset
- xor dx,dx
- div cx
- cmp ax,1020 ;Approaching end of buffer?
- jl wc1 ;less means no
- call flush ;Write the buffer
- push dx ;dx contains offset within byte
- add dx,nbits ;adjust by code size
- mov bit_offset,dx ;new bit offset
- pop dx ;restore dx
- add ax,offset output_data ;Point to last byte
- mov si,ax ;put in si
- mov al,byte ptr [si] ;move byte to first position
- mov output_data,al
- xor ax,ax ;Byte offset of zero
- wc1: add ax,offset output_data ;Point into buffer
- mov di,ax ;Destination
- pop ax ;Restore code
- mov cx,dx ;offset within byte
- xor dx,dx ;dx will catch bits rotated out
- jcxz wc3 ;If offset in byte is zero, skip shift
- wc2: shl ax,1 ;Rotate code
- rcl dx,1
- loop wc2
- or al,byte ptr [di] ;Grab bits currently in buffer
- wc3: stosw ;Save data
- mov al,dl ;Grab extra bits
- stosb ;and save
- ret
- write_code endp
-
- data segment
- bit_offset dw ?
- output_data db 1024 dup (?)
- data ends
-
- flush proc near
- push ax ;Save all registers
- push bx ;AX contains number of bytes to write
- push cx
- push dx
- hwrite output_handle,output_data,ax ;write output file
- pop dx
- pop cx
- pop bx
- pop ax
- ret
- flush endp
-
- read_char proc near
- mov di,input_offset ;Anything left in buffer?
- cmp di,input_size
- jl rd1 ;less means yes
- hread input_handle,input_data,1024 ;Read next chunk of input
- cmp ax,0 ;Anything left?
- je rd2 ;equal means no, finished
- mov input_size,ax ;Save bytes read
- mov input_offset,0 ;Point to beginning of buffer
- mov di,0
- rd1: lea si,input_data[di] ;Point at character
- lodsb ;Read it in
- inc input_offset ;Adjust pointer
- clc ;Success
- ret
- rd2: stc ;Nothing left
- ret
- read_char endp
-
- data segment
- input_data db 1024 dup (?)
- input_offset dw 0
- input_size dw 0
- data ends
-
- lookup_code proc near
- push ds ;Save seg reg
- mov ds,hash_seg ;point to hash table
- call index ;convert code to address
- mov di,0 ;flag
- cmp [si].first,-1 ;Has this code been used?
- je gc4 ;equal means no
- inc di ;set flag
- mov bx,[si].first ;Get first entry
- gc2: call index ;convert code to address
- cmp [si].char,al ;is char the same?
- jne gc3 ;ne means no
- clc ;success
- mov ax,bx ;put found code in ax
- pop ds ;restore seg reg
- ret ;done
- gc3: cmp [si].next,-1 ;More left with this prefix?
- je gc4 ;equal means no
- mov bx,[si].next ;get next code
- jmp gc2 ;try again
- gc4: stc ;not found
- pop ds ;restore seg reg
- ret ;done
- lookup_code endp
-
- index proc near
- mov si,bx ;si = bx * 5 (5 byte hash entries)
- shl si,1 ;si = bx * 2 * 2 + bx
- shl si,1
- add si,bx
- ret
- index endp
-
- add_code proc near
- mov bx,free_code ;Get code to use
- push ds ;point to hash table
- mov ds,hash_seg
- cmp di,0 ;First use of this prefix?
- je ac1 ;equal means yes
- mov [si].next,bx ;point last use to new entry
- jmp short ac2
- ac1: mov [si].first,bx ;Point first use to new entry
- ac2: cmp bx,maxmax ;Have we reached code limit?
- je ac3 ;equal means yes, just return
- call index ;get address of new entry
- mov [si].first,-1 ;initialize pointers
- mov [si].next,-1
- mov [si].char,al ;save suffix char
- inc es:free_code ;adjust next code
- ac3: pop ds ;restore seg reg
- ret
- add_code endp
-
- code ends
-
- end start
-